from utils.datasets.datasetLoader import dataLoader
from utils.preprocessing.preprocessor import Preprocessor
from utils.path.files import getFiles
from sklearn.preprocessing import LabelEncoder
import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage.feature import hog
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os
import glob
import warnings
%matplotlib inline
warnings.filterwarnings("ignore")
The data set provided by Udacity has over 17k images that belongs to two classes, non-vehicle and vehicle. I used this data set to train a linear SVM model for vehicle detection.
#Define input image width and height
img_width, img_height = 64, 64
_preprocessor = Preprocessor(img_width, img_height)
loader = dataLoader(preprocessor=[_preprocessor])
filePaths = getFiles(r'data', '.png')
X, Y = loader.load(filePaths, indexAsClass=-3, colorSpace='YcrCb')
label_encoder = LabelEncoder()
Y = label_encoder.fit_transform(Y)
I used Hog / bin spatial / color histogram as features to train a machine learning model.
# Define a function to return HOG features and visualization
def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
# Also return the hog image if vis==True
if vis == True:
features, hog_image = hog(img, orientations=orient,
pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block),
transform_sqrt=True,
visualise=vis, feature_vector=feature_vec)
return features, hog_image
# Otherwise return only the hog features
else:
features = hog(img, orientations=orient,
pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block),
transform_sqrt=True,
visualise=vis, feature_vector=feature_vec)
return features
#Compare the hog features of vehicle and non-vehicle images
feature_images = [X[2], X[10000]]
for image in feature_images:
hog_features, hog_image = get_hog_features(image[:,:,0], 10,
8, 2, vis=True, feature_vec=True)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(5, 5))
ax1.imshow(image)
ax1.set_title('Input')
ax2.imshow(hog_image, cmap='gray')
ax2.set_title('Hog features')
plt.show()
# Define a function to compute binned color features
def bin_spatial(img, size=(32, 32)):
# Use cv2.resize().ravel() to create the feature vector
features = cv2.resize(img, size).ravel()
# Return the feature vector
return features
feature_image = cv2.cvtColor(X[2], cv2.COLOR_BGR2RGB)
feature_vec = bin_spatial(feature_image, size=(32, 32))
plt.plot(feature_vec)
plt.title('Non-vehicle Spatially Binned Features')
plt.show()
feature_image = cv2.cvtColor(X[10000], cv2.COLOR_BGR2RGB)
feature_vec = bin_spatial(feature_image, size=(32, 32))
plt.plot(feature_vec)
plt.title('Vehicle-Spatially Binned Features')
plt.show()
# Define a function to compute color histogram features
# NEED TO CHANGE bins_range if reading .png files with mpimg!
def color_hist(img, nbins=32, bins_range=(0, 256)):
# Compute the histogram of the color channels separately
channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
# Concatenate the histograms into a single feature vector
hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
# Return the individual histograms, bin_centers and feature vector
return hist_features
feature_image = X[0]
feature_image = cv2.cvtColor(feature_image, cv2.COLOR_BGR2RGB)
hist_features = color_hist(feature_image, nbins=32)
plt.hist(hist_features)
plt.title('Color Histogram Features')
plt.show()
feature_image = X[10000]
feature_image = cv2.cvtColor(feature_image, cv2.COLOR_BGR2RGB)
hist_features = color_hist(feature_image, nbins=32)
plt.hist(hist_features)
plt.title('Color Histogram Features')
plt.show()
#Define a function that extract features from an image
def featureExtractor(image):
_orient = 9 # HOG orientations
_pix_per_cell = 8 # HOG pixels per cell
_cell_per_block = 2 # HOG cells per block
_spatial_size = (32, 32) # Spatial binning dimensions
_hist_bins = 32 # Number of histogram bins
hogFeatures_channel_1 = get_hog_features(image[:,:,0],
orient=_orient,
pix_per_cell=_pix_per_cell,
cell_per_block=_cell_per_block,
vis=False,
feature_vec=True)
hogFeatures_channel_2 = get_hog_features(image[:,:,1],
orient=_orient,
pix_per_cell=_pix_per_cell,
cell_per_block=_cell_per_block,
vis=False,
feature_vec=True)
hogFeatures_channel_3 = get_hog_features(image[:,:,2],
orient=_orient,
pix_per_cell=_pix_per_cell,
cell_per_block=_cell_per_block,
vis=False,
feature_vec=True)
binFeatures = bin_spatial(image, size=_spatial_size)
hist = color_hist(image, nbins=_hist_bins)
return np.hstack((hogFeatures_channel_1, hogFeatures_channel_2, hogFeatures_channel_3, binFeatures,hist)).ravel()
#Extract features from images
X = np.array([featureExtractor(x) for x in X])
X_scaler = StandardScaler().fit(X)
scaled_X = X_scaler.transform(X)
#Randomize the data
X, Y = shuffle(scaled_X, Y)
#Split the data into train set and test set
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
from sklearn.svm import LinearSVC
svc = LinearSVC()
# Train
svc.fit(X_train, y_train)
# Accuracy
print('Model accuracy: {:3f}'.format(svc.score(X_test, y_test)))
import pickle #pickle模块
#Save model
with open('svc_ver1.pickle', 'wb') as f:
pickle.dump(svc, f)
#Load pre-trained Model
with open('svc_ver1.pickle', 'rb') as f:
svc = pickle.load(f)
To find out how many sliding window i needed, i observeed where the vehicles appeared in the video. I started this task inside After Effects which is a video editing software, and last i found 8 window sizes that almost covered all the vehicles, and i used these 8 sliding windows for vehicle detection.

from skimage.morphology import label
from scipy.ndimage.measurements import label as lb
from collections import deque
import collections
SlidingWindow_parameters = collections.namedtuple('SlidingWindow', 'startX endX startY endY strideX strideY width height')
vehicle = collections.namedtuple('Vehicle', 'imageLastFrame locationLastFrame boundingBox_Width boundingBox_Height')
class VehicleDetector:
def __init__(self):
self.vehicles_detected = []
self.num_previousVehicles_detected = 0
self.lk_params = dict( winSize = (15,15),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
self.heatmapQue = deque(maxlen = 5)
self.searchWindows = [
SlidingWindow_parameters(750, 930, 390, 420, 6, 4, 90, 60),
SlidingWindow_parameters(750, 1180, 400, 420, 10, 6, 100, 65),
SlidingWindow_parameters(850, 980, 390, 410, 13, 7, 135, 75),
SlidingWindow_parameters(920, 1020, 395, 415, 16, 8, 165, 85),
SlidingWindow_parameters(750, 860, 390, 410, 11, 7, 110, 75),
SlidingWindow_parameters(700, 1100, 380, 420, 23, 13, 225, 145),
SlidingWindow_parameters(980, 1100, 385, 405, 23, 13, 250, 120),
SlidingWindow_parameters(700, 1100, 385, 405, 16, 13, 160, 135),
SlidingWindow_parameters(700, 1100, 385, 405, 28, 20, 285, 205)
]
#A sliding window scans an input image from top to bottom and left to right.
def sliding_window(self, image, x1, x2, y1, y2, stepSizeX, stepSizeY, windowSize):
# slide a window across the image
for y in range(y1, y2, stepSizeY):
for x in range(x1, x2, stepSizeX):
# yield the current window
yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])
#Draw bounding boxes to represent the vehicles that are detected
def draw_labeled_bboxes(self, img, labels):
locations = []
# Iterate through all detected cars
for car_number in range(1, labels[1]+1):
# Find pixels with each car_number label value
nonzero = (labels[0] == car_number).nonzero()
# Identify x and y values of those pixels
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Define a bounding box based on min/max x and y
bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
if (abs(bbox[0][0] - bbox[1][0]) / abs(bbox[0][1] - bbox[1][1]) > 6) or (abs(bbox[0][1] - bbox[1][1]) / abs(bbox[0][0] - bbox[1][0]) > 6):
pass
else:
locations.append([bbox[0], bbox[1]])
cv2.rectangle(img, bbox[0], bbox[1], (0,0,255), 6)
return img, locations, [bbox[1][0] - bbox[0][0], bbox[1][1] - bbox[0][1]]
def vehicleDetection(self, image, threshold = 25, threshold2=150, isMainMethod = True):
ycrcb = cv2.cvtColor(image.copy(), cv2.COLOR_RGB2YCR_CB)
heatmap = np.zeros(image.shape[:-1])
#Scan through the input image using different sizes of windows
for i, window in enumerate(self.searchWindows):
for x, y, block in self.sliding_window(ycrcb,
window.startX,
window.endX,
window.startY,
window.endY,
window.strideX,
window.strideY,
(window.width, window.height)):
resized = np.array(cv2.resize(block, (64, 64)))
features = featureExtractor(resized)
scaled = X_scaler.transform(features)
y_pred = svc.predict(np.array([scaled]))[0]
#If the the patch seems to have vehicles in it, draw on the heatmap on its location
if y_pred != 0:
heatmap[y:y+window.height,x:x+window.width] += 1
heatmap[heatmap < threshold] = 0
self.heatmapQue.appendleft(heatmap)
#Sum up the heatmaps value in the previous 5 frames
if len(self.heatmapQue) > 0:
heatmap_sum = np.zeros(image.shape[:-1])
for instance in self.heatmapQue:
heatmap_sum += instance
heatmap_sum[heatmap_sum < threshold2] = 0
labels = lb(heatmap_sum)
if labels[0].max() > 0:
image, locations, (width, height) = self.draw_labeled_bboxes(image.copy(), labels)
if isMainMethod:
self.vehicles_detected = []
for location1, location2 in locations:
self.vehicles_detected.append(vehicle(cv2.cvtColor(image.copy(),cv2.COLOR_RGB2GRAY),
np.array([[[(location1[0] + location2[0]) / 2, (location1[1] + location2[1]) / 2]]], dtype='float32'),
width, height))
self.num_previousVehicles_detected = len(self.vehicles_detected)
return image
else:
return len(locations)
else:
if isMainMethod:
return image
else:
return 0
def tracking(self, imgThisFrame):
availiable = []
for index, current_vehicle in enumerate(self.vehicles_detected):
gray = cv2.cvtColor(imgThisFrame.copy(), cv2.COLOR_BGR2GRAY)
p1, st, err = cv2.calcOpticalFlowPyrLK(current_vehicle.imageLastFrame,
gray,
current_vehicle.locationLastFrame,
None,
**self.lk_params)
if st[0][0] == 1:
cv2.rectangle(imgThisFrame, (int(p1[0][0][0] - current_vehicle.boundingBox_Width //2), int(p1[0][0][1] - current_vehicle.boundingBox_Height//2)),
(int(p1[0][0][0] + current_vehicle.boundingBox_Width //2), int(p1[0][0][1] + current_vehicle.boundingBox_Height//2)),
(0, 0, 255), 3)
movingForwardY_coef = p1[0][0][1] - current_vehicle.locationLastFrame[0][0][1]
movingForwardX_coef = p1[0][0][0] - current_vehicle.locationLastFrame[0][0][0]
dist = np.linalg.norm(p1 - current_vehicle.locationLastFrame)
availiable.append(vehicle(gray, p1,
current_vehicle.boundingBox_Width + movingForwardX_coef * 0.15,
current_vehicle.boundingBox_Height + movingForwardY_coef * 0.3))
if len(availiable) > 0:
self.vehicles_detected = availiable
else:
self.vehicles_detected = []
return imgThisFrame
def pipeline(self, image):
return self.vehicleDetection(image)
# if len(self.vehicles_detected) > 0:
# # return self.tracking(image)
# num_currentVehicle = self.vehicleDetection(image, isMainMethod=False)
# print(num_currentVehicle, self.num_previousVehicles_detected)
# if num_currentVehicle == self.num_previousVehicles_detected or num_currentVehicle == 0:
# # print('Tracking!')
# return self.tracking(image)
# else:
# # print('Number of vehicle detected has changed!', num_currentVehicle, self.num_previousVehicles_detected)
# return self.vehicleDetection(image)
# else:
# return self.vehicleDetection(image)
import matplotlib.pyplot as plt
test = VehicleDetector()
videoReaer = cv2.VideoCapture('project_video.mp4')
counter = 0
while True:
(ret, frame) = videoReaer.read()
if counter >= 500 and counter < 520:
if ret:
detection = test.pipeline(frame.copy())
plt.figure(figsize=(10, 10))
plt.imshow(cv2.cvtColor(detection, cv2.COLOR_BGR2RGB))
plt.show()
else:
break
counter+=1
### Import everything needed to edit/save/watch video clips
from moviepy.editor import VideoFileClip
from IPython.display import HTML
#Vehicle detector instance
tracker = VehicleDetector()
# Set up lines for left and right
white_output = 'project_output_v12.mp4'
clip1 = VideoFileClip("project_video.mp4")
white_clip = clip1.fl_image(tracker.pipeline) #NOTE: this function expects color images!!?
%time white_clip.write_videofile(white_output, audio=False)